package com.project.oj.recommend.impl;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.seg.common.Term;
import com.project.oj.recommend.TextPreprocessor;
import org.springframework.stereotype.Service;

import java.util.List;
import java.util.stream.Collectors;

@Service
public class TextAnalysisService {

    public List<String> preprocessText(String text) {
        List<Term> terms = HanLP.segment(text);
        return terms.stream()
                .filter(term -> shouldInclude(term))
                .map(term -> term.word)
                .collect(Collectors.toList());
    }

    private boolean shouldInclude(Term term) {
        String allowedTags = "n,v,vd,vn,a,d";
        return allowedTags.contains(term.nature.toString());
    }
}